{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "appid=''\n",
    "disk=''\n",
    "nic=''\n",
    "tz=''\n",
    "base_dir=''\n",
    "name=''\n",
    "notebook=''\n",
    "notebook_html=''\n",
    "proxy=''\n",
    "emails=''\n",
    "pr=''\n",
    "\n",
    "comp_appid=''\n",
    "comp_base_dir=''\n",
    "comp_name=''\n",
    "\n",
    "baseline_appid=''\n",
    "baseline_base_dir=''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%html\n",
    "<style>\n",
    "div.output_stderr {\n",
    "background: #ffdd;\n",
    "display: none;\n",
    "}\n",
    "</style>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import findspark\n",
    "findspark.init()\n",
    "\n",
    "import os\n",
    "import time\n",
    "import sys\n",
    "from pyspark import SparkConf, SparkContext\n",
    "from pyspark.sql import SQLContext\n",
    "\n",
    "def get_py4jzip():\n",
    "    spark_home=os.environ['SPARK_HOME']\n",
    "    py4jzip = !ls {spark_home}/python/lib/py4j*.zip\n",
    "    return py4jzip[0]\n",
    "\n",
    "conf = (SparkConf()\n",
    "    .set('spark.app.name', f'perf_analysis_{appid}')\n",
    "    .set('spark.serializer','org.apache.spark.serializer.KryoSerializer')\n",
    "    .set('spark.executor.instances', '4')\n",
    "    .set('spark.executor.cores','4')\n",
    "    .set('spark.executor.memory', '8g')\n",
    "    .set('spark.driver.memory','20g')\n",
    "    .set('spark.memory.offHeap.enabled','True')\n",
    "    .set('spark.memory.offHeap.size','20g')\n",
    "    .set('spark.executor.memoryOverhead','1g')\n",
    "    .set('spark.executor.extraJavaOptions',\n",
    "          '-XX:+UseParallelGC -XX:+UseParallelOldGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps')\n",
    "    .set('spark.executorEnv.PYTHONPATH',f\"{os.environ['SPARK_HOME']}/python:{get_py4jzip()}:{':'.join(sys.path)}\")\n",
    "    .set('spark.sql.inMemoryColumnarStorage.compressed','False')\n",
    "    .set('spark.sql.inMemoryColumnarStorage.batchSize','100000')\n",
    "    .set('spark.sql.execution.arrow.pyspark.fallback.enabled','True')\n",
    "    .set('spark.sql.execution.arrow.pyspark.enabled','True')\n",
    "    .set('spark.sql.execution.arrow.maxRecordsPerBatch','100000')\n",
    "    .set(\"spark.sql.repl.eagerEval.enabled\", True)\n",
    "    .set(\"spark.sql.legacy.timeParserPolicy\",\"LEGACY\")     \n",
    "    .set(\"spark.sql.session.timeZone\", tz)\n",
    "       )\n",
    "\n",
    "sc = SparkContext(conf=conf,master='yarn')\n",
    "sc.setLogLevel(\"ERROR\")\n",
    "spark = SQLContext(sc)\n",
    "time.sleep(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "%run ~/PAUS/sparklog.ipynb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.environ[\"https_proxy\"] = proxy\n",
    "os.environ[\"http_proxy\"] = proxy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "emonmetric=['emon_cpuutil',\n",
    "            'emon_cpufreq',\n",
    "            'emon_instr_retired',\n",
    "            'emon_ipc']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "disk_prefix=[f\"'{dev}'\" for dev in disk.split(',')]\n",
    "nic_prefix=[f\"'{dev}'\" for dev in nic.split(',')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "display(HTML('<a href=#App-info> 1 App info</a>'))\n",
    "display(HTML(f\"<a href=#Compare-to{'-' + comp_name if comp_name else ''}> 2 Compare to {comp_name}</a>\"))\n",
    "display(HTML('<a href=#Config-compare> 3 Config compare</a>'))\n",
    "display(HTML('<a href=#Compare-to-baseline> 4 Compare to baseline</a>'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# App info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "app=Application_Run(appid, basedir=base_dir)\n",
    "appals=app.analysis['app']['als']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stats=appals.get_basic_state()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "summary=app.get_summary(show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n",
    "display(summary.style)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "traceview=app.generate_trace_view(showemon=True,show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "appals.get_app_name()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "allconfs=appals.get_spark_config().to_dict()[0]\n",
    "if 'spark.plugins' in allconfs and allconfs['spark.plugins'] == 'org.apache.gluten.GlutenPlugin':\n",
    "    shuffle_df, dfx=appals.get_shuffle_stat()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "appals.get_app_info(disk_prefix=disk_prefix,nic_prefix=nic_prefix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "appals.show_critical_path_time_breakdown().T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if emails:\n",
    "    mail_list=' '.join(emails.split(','))\n",
    "    body,title=generate_email_body_title(appid, base_dir, name, comp_appid, comp_base_dir, comp_name, baseline_appid, baseline_base_dir, notebook, notebook_html, traceview, stats, summary, pr)\n",
    "    !mail -a \"Content-type: text/html; charset=utf-8\" -s \"$title\" $mail_list < $body"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compare to"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if comp_appid:\n",
    "    comp_app=Application_Run(comp_appid,basedir=comp_base_dir)\n",
    "    output=app.compare_app(rapp=comp_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n",
    "    display(HTML(output))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Config compare"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if comp_appid:\n",
    "    comp_appals=comp_app.analysis['app']['als']\n",
    "    display(comp_spark_conf(appals, comp_appals))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compare to baseline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if baseline_appid:\n",
    "    baseline_app=Application_Run(baseline_appid,basedir=baseline_base_dir)\n",
    "    output=app.compare_app(rapp=baseline_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n",
    "    display(HTML(output))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  Convert to HTML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%javascript\n",
    "IPython.notebook.kernel.execute('nb_name = \"' + IPython.notebook.notebook_name + '\"')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# htmlname=nb_name.replace(\"ipynb\",\"html\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !jupyter nbconvert --to html ./{nb_name} --no-input --output html/{htmlname} --template classic"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  },
  "nbTranslate": {
   "displayLangs": [
    "*"
   ],
   "hotkey": "alt-t",
   "langInMainMenu": true,
   "sourceLang": "en",
   "targetLang": "fr",
   "useGoogleTranslate": true
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": false,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "197px",
    "left": "2188px",
    "top": "111px",
    "width": "269px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
